Data Overview

readr::read_csv(here("data/character_list5.csv"),
                      progress = FALSE,
                      col_types = cols(
                                    script_id = col_integer(),
                                    imdb_character_name = col_character(),
                                    words = col_integer(),
                                    gender = col_character(),
                                    age = col_character()
                                    )) %>%
  mutate(age = as.numeric(age)) -> characters_list

readr::read_csv(here("data/meta_data7.csv"),
                      progress = FALSE,
         col_types = cols(
                        script_id = col_integer(),
                        imdb_id = col_character(),
                        title = col_character(),
                        year = col_integer(),
                        gross = col_integer(),
                        lines_data = col_character()
                        )) %>%
  mutate(title = iconv(title,"latin1", "UTF-8")) -> meta_data


Combinando Dados Originais

left_join(characters_list, 
          meta_data, 
          by=c("script_id")) %>%
  group_by(title, year) %>%
  drop_na(gross) %>%
  ungroup() -> scripts_data
scripts_data %>%
  glimpse()
Observations: 19,387
Variables: 10
$ script_id           <int> 280, 280, 280, 280, 280, 280, 280, 623, 623, 623, 623, 623, 623, 623...
$ imdb_character_name <chr> "betty", "carolyn johnson", "eleanor", "francesca johns", "madge", "...
$ words               <int> 311, 873, 138, 2251, 190, 723, 1908, 328, 409, 347, 2020, 366, 160, ...
$ gender              <chr> "f", "f", "f", "f", "f", "m", "m", "m", "f", "m", "m", "m", "m", "m"...
$ age                 <dbl> 35, NA, NA, 46, 46, 38, 65, NA, 28, NA, 58, 53, 25, 39, 33, NA, 34, ...
$ imdb_id             <chr> "tt0112579", "tt0112579", "tt0112579", "tt0112579", "tt0112579", "tt...
$ title               <chr> "The Bridges of Madison County", "The Bridges of Madison County", "T...
$ year                <int> 1995, 1995, 1995, 1995, 1995, 1995, 1995, 2001, 2001, 2001, 2001, 20...
$ gross               <int> 142, 142, 142, 142, 142, 142, 142, 37, 37, 37, 37, 37, 37, 37, 37, 3...
$ lines_data          <chr> "4332023434343443203433434334433434343434434344344333434443444344233...
scripts_data %>%
  mutate(fem_words = ifelse(gender == "f",words,0),
         man_words = ifelse(gender == "m",words,0)) %>%
  group_by(title, year) %>%
  mutate(total_fem_words = sum(fem_words),
         total_man_words = sum(man_words)) %>%
  filter(total_fem_words !=  0) %>%
  filter(total_man_words !=  0) %>%
    mutate(f_m_ratio = sum(gender == "f")/sum(gender == "m"),
           f_m_wordratio = total_fem_words/total_man_words) %>%
  ungroup()  -> scripts_data
scripts_data %>%
  select(title,
         year,
         f_m_ratio,
         f_m_wordratio) %>%
  sample_n(10)

Exploração dos Dados

Proporção entre dialógo feminino e masculino

scripts_data %>%
  group_by(title,year) %>%
  slice(1) %>%
  unique() %>%
  ggplot(aes(x=f_m_wordratio,
             y=(..count..)/sum(..count..))) +
  geom_histogram(binwidth = 0.1,
                 boundary = 0,
                 fill = "grey",
                 color = "black") +
  labs(y="Frequência Relativa")

  • Em alguns raríssimos exemplos há muito mais dialógo feminino que feminino.
scripts_data %>%
  group_by(title,year) %>%
  slice(1) %>%
  unique() %>%
  filter(f_m_wordratio < 10) %>%
  ggplot(aes(x=f_m_wordratio,
             y=(..count..)/sum(..count..))) +
  geom_histogram(binwidth = 0.1,
                 fill = "grey",
                 color = "black")+
  labs(y="Frequência Relativa")

  • Uma vez que filtramos os casos mais raros é possível ver que há uma forte domínio do dialógo masculino sobre o feminino nos filmes.
scripts_data %>%
  group_by(title,year) %>%
  slice(1) %>%
  unique() %>%
  ggplot(aes(x="", 
             y=f_m_wordratio)) +
  geom_violin(fill="grey",
               width=0.5)

  • É ainda mais óbvio:
    • A presença de alguns poucos casos de completo domínio do diálogo feminino
    • O geral domínio do dialógo masculino sobre feminino

Proporção entre personagens femininos e masculinos

scripts_data %>%
  group_by(title,year) %>%
  slice(1) %>%
  unique() %>%
  ggplot(aes(x=f_m_ratio,
             y=(..count..)/sum(..count..))) +
  geom_histogram(binwidth = 0.1,
                 boundary = 0,
                 fill = "grey",
                 color = "black") +
  scale_x_continuous(breaks = seq(0,10,0.5))+
  labs(y="Frequência Relativa")

  • É nítido o domínio de personagens masculinos
scripts_data %>%
  group_by(title,year) %>%
  slice(1) %>%
  unique() %>%
  ggplot(aes(x="", 
             y=f_m_ratio)) +
  geom_violin(fill="grey",
               width=0.5)

  • Além do forte domínio de personangens masculinos é possível ver a existência de algumas instâncias, embora raras de uma avassaladora presença femininina, (e.g 10 vezes mais mulheres que homens).

Ano do filme

scripts_data %>%
  group_by(title,year) %>%
  slice(1) %>%
  unique() %>%
  ggplot(aes(x=year)) +
  geom_bar(fill = "grey",
           color = "black") +
  labs(y="Frequência Absoluta")

  • Os filmes são sua maioria recentes, a quase totalidade dos filmes foi lançada a partir dos anos 1990.
scripts_data %>%
  group_by(title,year) %>%
  slice(1) %>%
  unique() %>%
  ggplot(aes(x="", 
             y=year)) +
  geom_violin(fill="grey",
               width=0.5)

  • Ainda é possível ver uma presença relevante de filmes do começo dos anos 1980.
  • Existem alguns filmes anteriores aos próprio anos 1950.

Faturamento do filme

scripts_data %>%
  group_by(title,year) %>%
  slice(1) %>%
  unique() %>%
  ggplot(aes(x=gross,
             y=(..count..)/sum(..count..))) +
  geom_histogram(binwidth = 50,
                 boundary = 0,
                 fill = "grey",
                 color = "black") +
  labs(y="Frequência Relativa")

  • Faturamento baixo ou razoável para a maior parte dos filmes.
  • Alguns poucos filmes tiveram um faturamento esmagador.
scripts_data %>%
  group_by(title,year) %>%   
  slice(1) %>%
  unique() %>%
  ggplot(aes(x="", 
             y=gross)) +
  geom_violin(fill="grey",
               width=0.5)

  • Resultados similares aos do respectivo histograma.

Aplicando escala apropriada aos dados.

scripts_data %>%
  group_by(title) %>%
  slice(1) %>%
  unique() %>%
  ungroup() %>%
  select(title,
         gross,
         f_m_ratio,
         f_m_wordratio) -> data
select(data, -title) %>%
mutate_all(funs(scale)) -> scaled_data
scaled_data %>% 
  sample_n(10)

Número K ótimo

Estatística GAP

A estatística GAP compara a solução do agrupamento com cada k com a solução em um dataset onde não há estrutura de grupos.

plot_clusgap = function(clusgap, title="Gap Statistic calculation results"){
    require("ggplot2")
    gstab = data.frame(clusgap$Tab, k=1:nrow(clusgap$Tab))
    p = ggplot(gstab, aes(k, gap)) + geom_line() + geom_point(size=5)
    p = p + geom_errorbar(aes(ymax=gap+SE.sim, ymin=gap-SE.sim), width = .2)
    p = p + ggtitle(title)
    return(p)
}
gaps <- scaled_data %>% 
    clusGap(FUN = kmeans,
            nstart = 20,
            K.max = 8,
            B = 200,
            iter.max=30)
Clustering k = 1,2,..., K.max (= 8): .. done
Bootstrapping, b = 1,2,..., B (= 200)  [one "." per sample]:
.................................................. 50 
.................................................. 100 
.................................................. 150 
.................................................. 200 
plot_clusgap(gaps)

  • 3 ou 6 grupos parece apropiado, mas como 6 é precedido por uma série de quedas 3 seria uma melhor opção.

Elbow Method

set.seed(123)
# Compute and plot wss for k = 2 to k = 15.
k.max <- 15
wss <- sapply(1:k.max, 
              function(k){kmeans(scaled_data, k, nstart=50,iter.max = 15 )$tot.withinss})
plot(1:k.max, wss,
     type="b", pch = 19, frame = FALSE, 
     xlab="Number of clusters K",
     ylab="Total within-clusters sum of squares")

  • Pelo Elbow method 3 parece ser um bom número de grupos devido à queda de 3 para 4.

Bayesian Information Criterion

  • Visualmente K= 2 e K = 3 representam o ganho mais significativo em termos de BIC (Bayesian Information Criterion)

Hubert Index e D Index

nb <- NbClust(scaled_data, diss=NULL, distance = "euclidean", 
              min.nc=2, max.nc=5, method = "kmeans", 
              index = "all", alphaBeale = 0.1)
*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot. 
 

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 5 proposed 2 as the best number of clusters 
* 8 proposed 3 as the best number of clusters 
* 2 proposed 4 as the best number of clusters 
* 8 proposed 5 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  3 
 
 
******************************************************************* 

hist(nb$Best.nc[1,], breaks = max(na.omit(nb$Best.nc[1,])))

  • O índice de Hubert e o índice D sugerem K = 3 como a melhor solução

K-Means


Agrupamento

n_clusters = 3
scaled_data %>%
    kmeans(n_clusters, iter.max = 100, nstart = 20) -> km
p <- autoplot(km, data=scaled_data, frame = TRUE)  
ggplotly(p)
  • É possível ver os 3 grupos nitidamente distintos, por meio de um zoom percebe-se que embora o grupo 1 e o grupo 3 estejam próximos o overlap é basicamente inexistente.
row.names(scaled_data) <- data$title
toclust <- scaled_data %>% 
    rownames_to_column(var = "title") 
km = toclust %>% 
    select(-title) %>% 
    kmeans(centers = n_clusters, iter.max = 100, nstart = 20)
km %>% 
    augment(toclust) %>% 
    gather(key = "variável", value = "valor", -title, -.cluster) %>% 
    ggplot(aes(x = `variável`, y = valor, group = title, colour = .cluster)) + 
    geom_point(alpha = 0.2) + 
    geom_line(alpha = .5) + 
    facet_wrap(~ .cluster) +
    coord_flip()



Grupo 1 - We Can Do It!

  • Menor Faturamento
  • Mais dialógo para as mulheres
  • Maior taxa de personagens femininos


O Grupo 1 - We Can Do It! é o grupo de filmes de maior representação feminina, quer seja em proporção de personagens femininos como em proporção de dialógos dedicados a personagens femininos. Existe porém uma característica negativa que acompanha este mesmo grupo, pois este é também o grupo das menores taxas de faturamento. Isso sugere uma infeliz associação negativa entre a representação feminina em filmes e o faturamento destes.


O nome do grupo se refere ao famoso cartaz de J. Howard Miller de 1943 incentivado as mulheres a participar no esforço de guerra nas fábricas. 



Grupo 2 - It’s A Man’s Man’s Man’s World

  • Maior faturamento entre todos
  • Menor taxa de dialógo para as mulheres
  • Menor taxa de personagens femininos


O Grupo 2 - It’s A Man’s Man’s Man’s World é o grupo de filmes de menor representação feminina, quer seja em proporção de personagens femininos como em proporção de dialógos dedicados a personagens femininos. Existe porém uma característica negativa que acompanha este mesmo grupo, pois este é também o grupo de maiores taxas de faturamento. Isso sugere uma infeliz associação positiva entre ausência de representação feminina em filmes e o faturamento destes.


O nome do grupo se refere à música de James Brown, a qual foi escrita por sua então namorada Betty Jean Newsome como um comentário sobre a relação entre os sexos.



Grupo 3 - Em cima do muro

  • Filmes medianos em termos de proporção de personagens femininos, proporção de dialógos dedicados a personagens femininos e faturamento.


O nome do grupo se refere à expressão que significa não tomar partido.


Qualidade da clusterização / Silhueta

dists = scaled_data %>% 
  dist()
scaled_data %>%
    kmeans(3, iter.max = 100, nstart = 20) -> km
silhouette(km$cluster, dists) %>%
   plot(col = RColorBrewer::brewer.pal(4, "Set2"),border=NA)


  • O valor de 0.6 da silhueta significa que a nossa clusterização foi razoável. ヾ(⌐■_■)ノ♪
LS0tCnRpdGxlOiAiRGlzdHJpYnVpw6fDo28gZGUgZGlhbMOzZ28gZW0gZmlsbWVzIgphdXRob3I6ICJKb3PDqSBCZW5hcmRpIGRlIFNvdXphIE51bmVzIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwotLS0KCgpgYGB7ciBzZXR1cCwgZWNobz1GQUxTRSwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0KCmxpYnJhcnkoaGVyZSkKbGlicmFyeShicm9vbSkKbGlicmFyeSh2ZWdhbikKbGlicmFyeShtY2x1c3QpCmxpYnJhcnkocGxvdGx5KQpsaWJyYXJ5KE5iQ2x1c3QpCmxpYnJhcnkobGF0dGljZSkKbGlicmFyeShjbHVzdGVyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShnZ2ZvcnRpZnkpCgp0aGVtZV9zZXQodGhlbWVfYncoKSkKYGBgCgojIERhdGEgT3ZlcnZpZXcKCmBgYHtyLCB3YXJuaW5nPUZBTFNFfQpyZWFkcjo6cmVhZF9jc3YoaGVyZSgiZGF0YS9jaGFyYWN0ZXJfbGlzdDUuY3N2IiksCiAgICAgICAgICAgICAgICAgICAgICBwcm9ncmVzcyA9IEZBTFNFLAogICAgICAgICAgICAgICAgICAgICAgY29sX3R5cGVzID0gY29scygKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2NyaXB0X2lkID0gY29sX2ludGVnZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaW1kYl9jaGFyYWN0ZXJfbmFtZSA9IGNvbF9jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgd29yZHMgPSBjb2xfaW50ZWdlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBnZW5kZXIgPSBjb2xfY2hhcmFjdGVyKCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFnZSA9IGNvbF9jaGFyYWN0ZXIoKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApKSAlPiUKICBtdXRhdGUoYWdlID0gYXMubnVtZXJpYyhhZ2UpKSAtPiBjaGFyYWN0ZXJzX2xpc3QKCnJlYWRyOjpyZWFkX2NzdihoZXJlKCJkYXRhL21ldGFfZGF0YTcuY3N2IiksCiAgICAgICAgICAgICAgICAgICAgICBwcm9ncmVzcyA9IEZBTFNFLAogICAgICAgICBjb2xfdHlwZXMgPSBjb2xzKAogICAgICAgICAgICAgICAgICAgICAgICBzY3JpcHRfaWQgPSBjb2xfaW50ZWdlcigpLAogICAgICAgICAgICAgICAgICAgICAgICBpbWRiX2lkID0gY29sX2NoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgICAgICAgICB0aXRsZSA9IGNvbF9jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgeWVhciA9IGNvbF9pbnRlZ2VyKCksCiAgICAgICAgICAgICAgICAgICAgICAgIGdyb3NzID0gY29sX2ludGVnZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgbGluZXNfZGF0YSA9IGNvbF9jaGFyYWN0ZXIoKQogICAgICAgICAgICAgICAgICAgICAgICApKSAlPiUKICBtdXRhdGUodGl0bGUgPSBpY29udih0aXRsZSwibGF0aW4xIiwgIlVURi04IikpIC0+IG1ldGFfZGF0YQpgYGAKCjxicj4KCiMjIyMgQ29tYmluYW5kbyBEYWRvcyBPcmlnaW5haXMKCmBgYHtyfQpsZWZ0X2pvaW4oY2hhcmFjdGVyc19saXN0LCAKICAgICAgICAgIG1ldGFfZGF0YSwgCiAgICAgICAgICBieT1jKCJzY3JpcHRfaWQiKSkgJT4lCiAgZ3JvdXBfYnkodGl0bGUsIHllYXIpICU+JQogIGRyb3BfbmEoZ3Jvc3MpICU+JQogIHVuZ3JvdXAoKSAtPiBzY3JpcHRzX2RhdGEKCnNjcmlwdHNfZGF0YSAlPiUKICBnbGltcHNlKCkKYGBgCgpgYGB7cn0Kc2NyaXB0c19kYXRhICU+JQogIG11dGF0ZShmZW1fd29yZHMgPSBpZmVsc2UoZ2VuZGVyID09ICJmIix3b3JkcywwKSwKICAgICAgICAgbWFuX3dvcmRzID0gaWZlbHNlKGdlbmRlciA9PSAibSIsd29yZHMsMCkpICU+JQogIGdyb3VwX2J5KHRpdGxlLCB5ZWFyKSAlPiUKICBtdXRhdGUodG90YWxfZmVtX3dvcmRzID0gc3VtKGZlbV93b3JkcyksCiAgICAgICAgIHRvdGFsX21hbl93b3JkcyA9IHN1bShtYW5fd29yZHMpKSAlPiUKICBmaWx0ZXIodG90YWxfZmVtX3dvcmRzICE9ICAwKSAlPiUKICBmaWx0ZXIodG90YWxfbWFuX3dvcmRzICE9ICAwKSAlPiUKICAgIG11dGF0ZShmX21fcmF0aW8gPSBzdW0oZ2VuZGVyID09ICJmIikvc3VtKGdlbmRlciA9PSAibSIpLAogICAgICAgICAgIGZfbV93b3JkcmF0aW8gPSB0b3RhbF9mZW1fd29yZHMvdG90YWxfbWFuX3dvcmRzKSAlPiUKICB1bmdyb3VwKCkgIC0+IHNjcmlwdHNfZGF0YQoKc2NyaXB0c19kYXRhICU+JQogIHNlbGVjdCh0aXRsZSwKICAgICAgICAgeWVhciwKICAgICAgICAgZl9tX3JhdGlvLAogICAgICAgICBmX21fd29yZHJhdGlvKSAlPiUKICBzYW1wbGVfbigxMCkKYGBgCgojIyBFeHBsb3Jhw6fDo28gZG9zIERhZG9zIAoKIyMjIFByb3BvcsOnw6NvIGVudHJlIGRpYWzDs2dvIGZlbWluaW5vIGUgbWFzY3VsaW5vCgpgYGB7cn0Kc2NyaXB0c19kYXRhICU+JQogIGdyb3VwX2J5KHRpdGxlLHllYXIpICU+JQogIHNsaWNlKDEpICU+JQogIHVuaXF1ZSgpICU+JQogIGdncGxvdChhZXMoeD1mX21fd29yZHJhdGlvLAogICAgICAgICAgICAgeT0oLi5jb3VudC4uKS9zdW0oLi5jb3VudC4uKSkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDAuMSwKICAgICAgICAgICAgICAgICBib3VuZGFyeSA9IDAsCiAgICAgICAgICAgICAgICAgZmlsbCA9ICJncmV5IiwKICAgICAgICAgICAgICAgICBjb2xvciA9ICJibGFjayIpICsKICBsYWJzKHk9IkZyZXF1w6puY2lhIFJlbGF0aXZhIikKYGBgCgoqIEVtIGFsZ3VucyByYXLDrXNzaW1vcyBleGVtcGxvcyBow6EgbXVpdG8gbWFpcyBkaWFsw7NnbyBmZW1pbmlubyBxdWUgZmVtaW5pbm8uIAoKYGBge3J9CnNjcmlwdHNfZGF0YSAlPiUKICBncm91cF9ieSh0aXRsZSx5ZWFyKSAlPiUKICBzbGljZSgxKSAlPiUKICB1bmlxdWUoKSAlPiUKICBmaWx0ZXIoZl9tX3dvcmRyYXRpbyA8IDEwKSAlPiUKICBnZ3Bsb3QoYWVzKHg9Zl9tX3dvcmRyYXRpbywKICAgICAgICAgICAgIHk9KC4uY291bnQuLikvc3VtKC4uY291bnQuLikpKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAwLjEsCiAgICAgICAgICAgICAgICAgZmlsbCA9ICJncmV5IiwKICAgICAgICAgICAgICAgICBjb2xvciA9ICJibGFjayIpICsKICBsYWJzKHk9IkZyZXF1w6puY2lhIFJlbGF0aXZhIikKYGBgCgoqIFVtYSB2ZXogcXVlIGZpbHRyYW1vcyBvcyBjYXNvcyBtYWlzIHJhcm9zIMOpIHBvc3PDrXZlbCB2ZXIgcXVlIGjDoSB1bWEgZm9ydGUgZG9tw61uaW8gZG8gZGlhbMOzZ28gbWFzY3VsaW5vIHNvYnJlIG8gZmVtaW5pbm8gbm9zIGZpbG1lcy4KCmBgYHtyfQpzY3JpcHRzX2RhdGEgJT4lCiAgZ3JvdXBfYnkodGl0bGUseWVhcikgJT4lCiAgc2xpY2UoMSkgJT4lCiAgdW5pcXVlKCkgJT4lCiAgZ2dwbG90KGFlcyh4PSIiLCAKICAgICAgICAgICAgIHk9Zl9tX3dvcmRyYXRpbykpICsKICBnZW9tX3Zpb2xpbihmaWxsPSJncmV5IiwKICAgICAgICAgICAgICAgd2lkdGg9MC41KQpgYGAKCiogw4kgYWluZGEgbWFpcyDDs2J2aW86CiAgICAqIEEgcHJlc2Vuw6dhIGRlIGFsZ3VucyBwb3Vjb3MgY2Fzb3MgZGUgY29tcGxldG8gZG9tw61uaW8gZG8gZGnDoWxvZ28gZmVtaW5pbm8KICAgICogTyBnZXJhbCBkb23DrW5pbyBkbyBkaWFsw7NnbyBtYXNjdWxpbm8gc29icmUgZmVtaW5pbm8KCiMjIyBQcm9wb3LDp8OjbyBlbnRyZSBwZXJzb25hZ2VucyBmZW1pbmlub3MgZSBtYXNjdWxpbm9zIAoKYGBge3J9CnNjcmlwdHNfZGF0YSAlPiUKICBncm91cF9ieSh0aXRsZSx5ZWFyKSAlPiUKICBzbGljZSgxKSAlPiUKICB1bmlxdWUoKSAlPiUKICBnZ3Bsb3QoYWVzKHg9Zl9tX3JhdGlvLAogICAgICAgICAgICAgeT0oLi5jb3VudC4uKS9zdW0oLi5jb3VudC4uKSkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDAuMSwKICAgICAgICAgICAgICAgICBib3VuZGFyeSA9IDAsCiAgICAgICAgICAgICAgICAgZmlsbCA9ICJncmV5IiwKICAgICAgICAgICAgICAgICBjb2xvciA9ICJibGFjayIpICsKICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzID0gc2VxKDAsMTAsMC41KSkgKwogIGxhYnMoeT0iRnJlcXXDqm5jaWEgUmVsYXRpdmEiKQpgYGAKCiogw4kgbsOtdGlkbyBvIGRvbcOtbmlvIGRlIHBlcnNvbmFnZW5zIG1hc2N1bGlub3MKCmBgYHtyfQpzY3JpcHRzX2RhdGEgJT4lCiAgZ3JvdXBfYnkodGl0bGUseWVhcikgJT4lCiAgc2xpY2UoMSkgJT4lCiAgdW5pcXVlKCkgJT4lCiAgZ2dwbG90KGFlcyh4PSIiLCAKICAgICAgICAgICAgIHk9Zl9tX3JhdGlvKSkgKwogIGdlb21fdmlvbGluKGZpbGw9ImdyZXkiLAogICAgICAgICAgICAgICB3aWR0aD0wLjUpCmBgYAoKKiBBbMOpbSBkbyBmb3J0ZSBkb23DrW5pbyBkZSBwZXJzb25hbmdlbnMgbWFzY3VsaW5vcyDDqSBwb3Nzw612ZWwgdmVyIGEgZXhpc3TDqm5jaWEgZGUgYWxndW1hcyBpbnN0w6JuY2lhcywgZW1ib3JhIHJhcmFzIGRlIHVtYSBhdmFzc2FsYWRvcmEgcHJlc2Vuw6dhIGZlbWluaW5pbmEsIChlLmcgMTAgdmV6ZXMgbWFpcyBtdWxoZXJlcyBxdWUgaG9tZW5zKS4KCiMjIyBBbm8gZG8gZmlsbWUgCgpgYGB7cn0Kc2NyaXB0c19kYXRhICU+JQogIGdyb3VwX2J5KHRpdGxlLHllYXIpICU+JQogIHNsaWNlKDEpICU+JQogIHVuaXF1ZSgpICU+JQogIGdncGxvdChhZXMoeD15ZWFyKSkgKwogIGdlb21fYmFyKGZpbGwgPSAiZ3JleSIsCiAgICAgICAgICAgY29sb3IgPSAiYmxhY2siKSArCiAgbGFicyh5PSJGcmVxdcOqbmNpYSBBYnNvbHV0YSIpCmBgYAoKKiBPcyBmaWxtZXMgc8OjbyBzdWEgbWFpb3JpYSByZWNlbnRlcywgYSBxdWFzZSB0b3RhbGlkYWRlIGRvcyBmaWxtZXMgZm9pIGxhbsOnYWRhIGEgcGFydGlyIGRvcyBhbm9zIDE5OTAuCgpgYGB7cn0Kc2NyaXB0c19kYXRhICU+JQogIGdyb3VwX2J5KHRpdGxlLHllYXIpICU+JQogIHNsaWNlKDEpICU+JQogIHVuaXF1ZSgpICU+JQogIGdncGxvdChhZXMoeD0iIiwgCiAgICAgICAgICAgICB5PXllYXIpKSArCiAgZ2VvbV92aW9saW4oZmlsbD0iZ3JleSIsCiAgICAgICAgICAgICAgIHdpZHRoPTAuNSkKYGBgCgoqIEFpbmRhIMOpIHBvc3PDrXZlbCB2ZXIgdW1hIHByZXNlbsOnYSByZWxldmFudGUgZGUgZmlsbWVzIGRvIGNvbWXDp28gZG9zIGFub3MgMTk4MC4KKiBFeGlzdGVtIGFsZ3VucyBmaWxtZXMgYW50ZXJpb3JlcyBhb3MgcHLDs3ByaW8gYW5vcyAxOTUwLgoKIyMjIEZhdHVyYW1lbnRvIGRvIGZpbG1lIAoKYGBge3J9CnNjcmlwdHNfZGF0YSAlPiUKICBncm91cF9ieSh0aXRsZSx5ZWFyKSAlPiUKICBzbGljZSgxKSAlPiUKICB1bmlxdWUoKSAlPiUKICBnZ3Bsb3QoYWVzKHg9Z3Jvc3MsCiAgICAgICAgICAgICB5PSguLmNvdW50Li4pL3N1bSguLmNvdW50Li4pKSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gNTAsCiAgICAgICAgICAgICAgICAgYm91bmRhcnkgPSAwLAogICAgICAgICAgICAgICAgIGZpbGwgPSAiZ3JleSIsCiAgICAgICAgICAgICAgICAgY29sb3IgPSAiYmxhY2siKSArCiAgbGFicyh5PSJGcmVxdcOqbmNpYSBSZWxhdGl2YSIpCmBgYAoKKiBGYXR1cmFtZW50byBiYWl4byBvdSByYXpvw6F2ZWwgcGFyYSBhIG1haW9yIHBhcnRlIGRvcyBmaWxtZXMuCiogQWxndW5zIHBvdWNvcyBmaWxtZXMgdGl2ZXJhbSB1bSBmYXR1cmFtZW50byBlc21hZ2Fkb3IuCgpgYGB7cn0Kc2NyaXB0c19kYXRhICU+JQogIGdyb3VwX2J5KHRpdGxlLHllYXIpICU+JSAgIAogIHNsaWNlKDEpICU+JQogIHVuaXF1ZSgpICU+JQogIGdncGxvdChhZXMoeD0iIiwgCiAgICAgICAgICAgICB5PWdyb3NzKSkgKwogIGdlb21fdmlvbGluKGZpbGw9ImdyZXkiLAogICAgICAgICAgICAgICB3aWR0aD0wLjUpCmBgYAoKKiBSZXN1bHRhZG9zIHNpbWlsYXJlcyBhb3MgZG8gcmVzcGVjdGl2byBoaXN0b2dyYW1hLgoKIyMgQXBsaWNhbmRvIGVzY2FsYSBhcHJvcHJpYWRhIGFvcyBkYWRvcy4KCmBgYHtyfQpzY3JpcHRzX2RhdGEgJT4lCiAgZ3JvdXBfYnkodGl0bGUpICU+JQogIHNsaWNlKDEpICU+JQogIHVuaXF1ZSgpICU+JQogIHVuZ3JvdXAoKSAlPiUKICBzZWxlY3QodGl0bGUsCiAgICAgICAgIGdyb3NzLAogICAgICAgICBmX21fcmF0aW8sCiAgICAgICAgIGZfbV93b3JkcmF0aW8pIC0+IGRhdGEKCnNlbGVjdChkYXRhLCAtdGl0bGUpICU+JQptdXRhdGVfYWxsKGZ1bnMoc2NhbGUpKSAtPiBzY2FsZWRfZGF0YQoKc2NhbGVkX2RhdGEgJT4lIAogIHNhbXBsZV9uKDEwKQpgYGAKCiMgIE7Dum1lcm8gSyDDs3RpbW8gCgojIyBFc3RhdMOtc3RpY2EgR0FQIAoKQSBlc3RhdMOtc3RpY2EgR0FQIGNvbXBhcmEgYSBzb2x1w6fDo28gZG8gYWdydXBhbWVudG8gY29tIGNhZGEgayBjb20gYSBzb2x1w6fDo28gZW0gdW0gZGF0YXNldCBvbmRlIG7Do28gaMOhIGVzdHJ1dHVyYSBkZSBncnVwb3MuIAoKYGBge3J9CnBsb3RfY2x1c2dhcCA9IGZ1bmN0aW9uKGNsdXNnYXAsIHRpdGxlPSJHYXAgU3RhdGlzdGljIGNhbGN1bGF0aW9uIHJlc3VsdHMiKXsKICAgIHJlcXVpcmUoImdncGxvdDIiKQogICAgZ3N0YWIgPSBkYXRhLmZyYW1lKGNsdXNnYXAkVGFiLCBrPTE6bnJvdyhjbHVzZ2FwJFRhYikpCiAgICBwID0gZ2dwbG90KGdzdGFiLCBhZXMoaywgZ2FwKSkgKyBnZW9tX2xpbmUoKSArIGdlb21fcG9pbnQoc2l6ZT01KQogICAgcCA9IHAgKyBnZW9tX2Vycm9yYmFyKGFlcyh5bWF4PWdhcCtTRS5zaW0sIHltaW49Z2FwLVNFLnNpbSksIHdpZHRoID0gLjIpCiAgICBwID0gcCArIGdndGl0bGUodGl0bGUpCiAgICByZXR1cm4ocCkKfQpgYGAKCmBgYHtyfQpnYXBzIDwtIHNjYWxlZF9kYXRhICU+JSAKICAgIGNsdXNHYXAoRlVOID0ga21lYW5zLAogICAgICAgICAgICBuc3RhcnQgPSAyMCwKICAgICAgICAgICAgSy5tYXggPSA4LAogICAgICAgICAgICBCID0gMjAwLAogICAgICAgICAgICBpdGVyLm1heD0zMCkKYGBgCgpgYGB7cn0KcGxvdF9jbHVzZ2FwKGdhcHMpCmBgYAoKKiAzIG91IDYgZ3J1cG9zIHBhcmVjZSBhcHJvcGlhZG8sIG1hcyBjb21vIDYgw6kgcHJlY2VkaWRvIHBvciB1bWEgc8OpcmllIGRlIHF1ZWRhcyAzIHNlcmlhIHVtYSBtZWxob3Igb3DDp8Ojby4KCiMjIEVsYm93IE1ldGhvZAoKYGBge3J9CnNldC5zZWVkKDEyMykKIyBDb21wdXRlIGFuZCBwbG90IHdzcyBmb3IgayA9IDIgdG8gayA9IDE1LgprLm1heCA8LSAxNQoKd3NzIDwtIHNhcHBseSgxOmsubWF4LCAKICAgICAgICAgICAgICBmdW5jdGlvbihrKXtrbWVhbnMoc2NhbGVkX2RhdGEsIGssIG5zdGFydD01MCxpdGVyLm1heCA9IDE1ICkkdG90LndpdGhpbnNzfSkKcGxvdCgxOmsubWF4LCB3c3MsCiAgICAgdHlwZT0iYiIsIHBjaCA9IDE5LCBmcmFtZSA9IEZBTFNFLCAKICAgICB4bGFiPSJOdW1iZXIgb2YgY2x1c3RlcnMgSyIsCiAgICAgeWxhYj0iVG90YWwgd2l0aGluLWNsdXN0ZXJzIHN1bSBvZiBzcXVhcmVzIikKYGBgCgoqIFBlbG8gRWxib3cgbWV0aG9kIDMgcGFyZWNlIHNlciB1bSBib20gbsO6bWVybyBkZSBncnVwb3MgZGV2aWRvIMOgIHF1ZWRhIGRlIDMgcGFyYSA0LgoKIyMgQmF5ZXNpYW4gSW5mb3JtYXRpb24gQ3JpdGVyaW9uCgpgYGB7ciBlY2hvPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQpkX2NsdXN0IDwtIE1jbHVzdChhcy5tYXRyaXgoc2NhbGVkX2RhdGEpLCBHPTE6MTUsIAogICAgICAgICAgICAgICAgICBtb2RlbE5hbWVzID0gbWNsdXN0Lm9wdGlvbnMoImVtTW9kZWxOYW1lcyIpKQoKcGxvdChkX2NsdXN0JEJJQykKYGBgCgoqIFZpc3VhbG1lbnRlIEs9IDIgZSBLID0gMyByZXByZXNlbnRhbSBvIGdhbmhvIG1haXMgc2lnbmlmaWNhdGl2byBlbSB0ZXJtb3MgZGUgQklDIChCYXllc2lhbiBJbmZvcm1hdGlvbiBDcml0ZXJpb24pIAoKIyMgSHViZXJ0IEluZGV4IGUgRCBJbmRleAoKYGBge3J9Cm5iIDwtIE5iQ2x1c3Qoc2NhbGVkX2RhdGEsIGRpc3M9TlVMTCwgZGlzdGFuY2UgPSAiZXVjbGlkZWFuIiwgCiAgICAgICAgICAgICAgbWluLm5jPTIsIG1heC5uYz01LCBtZXRob2QgPSAia21lYW5zIiwgCiAgICAgICAgICAgICAgaW5kZXggPSAiYWxsIiwgYWxwaGFCZWFsZSA9IDAuMSkKaGlzdChuYiRCZXN0Lm5jWzEsXSwgYnJlYWtzID0gbWF4KG5hLm9taXQobmIkQmVzdC5uY1sxLF0pKSkKYGBgCgoqIE8gw61uZGljZSBkZSBIdWJlcnQgZSBvIMOtbmRpY2UgRCBzdWdlcmVtIEsgPSAzICBjb21vIGEgbWVsaG9yIHNvbHXDp8OjbwoKIyBLLU1lYW5zIAoKKioqKgoKIyMgQWdydXBhbWVudG8KCmBgYHtyfQpuX2NsdXN0ZXJzID0gMwoKc2NhbGVkX2RhdGEgJT4lCiAgICBrbWVhbnMobl9jbHVzdGVycywgaXRlci5tYXggPSAxMDAsIG5zdGFydCA9IDIwKSAtPiBrbQoKcCA8LSBhdXRvcGxvdChrbSwgZGF0YT1zY2FsZWRfZGF0YSwgZnJhbWUgPSBUUlVFKSAgCgpnZ3Bsb3RseShwKQoKYGBgCgoqIMOJIHBvc3PDrXZlbCB2ZXIgb3MgMyBncnVwb3Mgbml0aWRhbWVudGUgZGlzdGludG9zLCBwb3IgbWVpbyBkZSB1bSB6b29tIHBlcmNlYmUtc2UgcXVlIGVtYm9yYSBvIGdydXBvIDEgZSBvIGdydXBvIDMgZXN0ZWphbSBwcsOzeGltb3MgbyBvdmVybGFwIMOpIGJhc2ljYW1lbnRlIGluZXhpc3RlbnRlLgoKYGBge3IsIHdhcm5pbmc9RkFMU0V9CnJvdy5uYW1lcyhzY2FsZWRfZGF0YSkgPC0gZGF0YSR0aXRsZQoKdG9jbHVzdCA8LSBzY2FsZWRfZGF0YSAlPiUgCiAgICByb3duYW1lc190b19jb2x1bW4odmFyID0gInRpdGxlIikgCgprbSA9IHRvY2x1c3QgJT4lIAogICAgc2VsZWN0KC10aXRsZSkgJT4lIAogICAga21lYW5zKGNlbnRlcnMgPSBuX2NsdXN0ZXJzLCBpdGVyLm1heCA9IDEwMCwgbnN0YXJ0ID0gMjApCgprbSAlPiUgCiAgICBhdWdtZW50KHRvY2x1c3QpICU+JSAKICAgIGdhdGhlcihrZXkgPSAidmFyacOhdmVsIiwgdmFsdWUgPSAidmFsb3IiLCAtdGl0bGUsIC0uY2x1c3RlcikgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYHZhcmnDoXZlbGAsIHkgPSB2YWxvciwgZ3JvdXAgPSB0aXRsZSwgY29sb3VyID0gLmNsdXN0ZXIpKSArIAogICAgZ2VvbV9wb2ludChhbHBoYSA9IDAuMikgKyAKICAgIGdlb21fbGluZShhbHBoYSA9IC41KSArIAogICAgZmFjZXRfd3JhcCh+IC5jbHVzdGVyKSArCiAgICBjb29yZF9mbGlwKCkKCmBgYAoKPGJyPgoKKioqCgpHcnVwbyAxIC0gKipXZSBDYW4gRG8gSXQhKioKCiAgKiBNZW5vciBGYXR1cmFtZW50byAKICAqIE1haXMgZGlhbMOzZ28gcGFyYSBhcyBtdWxoZXJlcyAKICAqIE1haW9yIHRheGEgZGUgcGVyc29uYWdlbnMgZmVtaW5pbm9zCiAgICAKPGJyPgoKTyBHcnVwbyAxIC0gKipXZSBDYW4gRG8gSXQhKiogw6kgbyBncnVwbyBkZSBmaWxtZXMgZGUgbWFpb3IgcmVwcmVzZW50YcOnw6NvIGZlbWluaW5hLCBxdWVyIHNlamEgZW0gcHJvcG9yw6fDo28gZGUgcGVyc29uYWdlbnMgZmVtaW5pbm9zIGNvbW8gZW0gcHJvcG9yw6fDo28gZGUgZGlhbMOzZ29zIGRlZGljYWRvcyBhIHBlcnNvbmFnZW5zIGZlbWluaW5vcy4gRXhpc3RlIHBvcsOpbSB1bWEgY2FyYWN0ZXLDrXN0aWNhIG5lZ2F0aXZhIHF1ZSBhY29tcGFuaGEgZXN0ZSBtZXNtbyBncnVwbywgcG9pcyBlc3RlIMOpIHRhbWLDqW0gbyBncnVwbyBkYXMgbWVub3JlcyB0YXhhcyBkZSBmYXR1cmFtZW50by4gSXNzbyBzdWdlcmUgdW1hIGluZmVsaXogYXNzb2NpYcOnw6NvIG5lZ2F0aXZhIGVudHJlIGEgcmVwcmVzZW50YcOnw6NvIGZlbWluaW5hIGVtIGZpbG1lcyBlIG8gZmF0dXJhbWVudG8gZGVzdGVzLiAKCjxicj4KCgpgYGAKTyBub21lIGRvIGdydXBvIHNlIHJlZmVyZSBhbyBmYW1vc28gY2FydGF6IGRlIEouIEhvd2FyZCBNaWxsZXIgZGUgMTk0MyBpbmNlbnRpdmFkbyBhcyBtdWxoZXJlcyBhIHBhcnRpY2lwYXIgbm8gZXNmb3LDp28gZGUgZ3VlcnJhIG5hcyBmw6FicmljYXMuIApgYGAKCjxicj4KCioqKgoKR3J1cG8gMiAtICoqSXQncyBBIE1hbidzIE1hbidzIE1hbidzIFdvcmxkKiogICAKCiogTWFpb3IgZmF0dXJhbWVudG8gZW50cmUgdG9kb3MKKiBNZW5vciB0YXhhIGRlIGRpYWzDs2dvIHBhcmEgYXMgbXVsaGVyZXMKKiBNZW5vciB0YXhhIGRlIHBlcnNvbmFnZW5zIGZlbWluaW5vcwogICAgCjxicj4KCk8gR3J1cG8gMiAtICoqSXQncyBBIE1hbidzIE1hbidzIE1hbidzIFdvcmxkKiogw6kgbyBncnVwbyBkZSBmaWxtZXMgZGUgbWVub3IgcmVwcmVzZW50YcOnw6NvIGZlbWluaW5hLCBxdWVyIHNlamEgZW0gcHJvcG9yw6fDo28gZGUgcGVyc29uYWdlbnMgZmVtaW5pbm9zIGNvbW8gZW0gcHJvcG9yw6fDo28gZGUgZGlhbMOzZ29zIGRlZGljYWRvcyBhIHBlcnNvbmFnZW5zIGZlbWluaW5vcy4gRXhpc3RlIHBvcsOpbSB1bWEgY2FyYWN0ZXLDrXN0aWNhIG5lZ2F0aXZhIHF1ZSBhY29tcGFuaGEgZXN0ZSBtZXNtbyBncnVwbywgcG9pcyBlc3RlIMOpIHRhbWLDqW0gbyBncnVwbyBkZSBtYWlvcmVzIHRheGFzIGRlIGZhdHVyYW1lbnRvLiBJc3NvIHN1Z2VyZSB1bWEgaW5mZWxpeiBhc3NvY2lhw6fDo28gcG9zaXRpdmEgZW50cmUgYXVzw6puY2lhIGRlICByZXByZXNlbnRhw6fDo28gZmVtaW5pbmEgZW0gZmlsbWVzIGUgbyBmYXR1cmFtZW50byBkZXN0ZXMuIAoKPGJyPgoKYGBgCk8gbm9tZSBkbyBncnVwbyBzZSByZWZlcmUgw6AgbcO6c2ljYSBkZSBKYW1lcyBCcm93biwgYSBxdWFsIGZvaSBlc2NyaXRhIHBvciBzdWEgZW50w6NvIG5hbW9yYWRhIEJldHR5IEplYW4gTmV3c29tZSBjb21vIHVtIGNvbWVudMOhcmlvIHNvYnJlIGEgcmVsYcOnw6NvIGVudHJlIG9zIHNleG9zLgpgYGAKCjxicj4KCioqKgoKR3J1cG8gMyAtICoqRW0gY2ltYSBkbyBtdXJvKioKCiAgKiBGaWxtZXMgbWVkaWFub3MgZW0gdGVybW9zIGRlIHByb3BvcsOnw6NvIGRlIHBlcnNvbmFnZW5zIGZlbWluaW5vcywgcHJvcG9yw6fDo28gZGUgZGlhbMOzZ29zIGRlZGljYWRvcyBhIHBlcnNvbmFnZW5zIGZlbWluaW5vcyBlIGZhdHVyYW1lbnRvLgogICAgCjxicj4KCmBgYApPIG5vbWUgZG8gZ3J1cG8gc2UgcmVmZXJlIMOgIGV4cHJlc3PDo28gcXVlIHNpZ25pZmljYSBuw6NvIHRvbWFyIHBhcnRpZG8uCmBgYAoKPGJyPgoKIyMgUXVhbGlkYWRlIGRhIGNsdXN0ZXJpemHDp8OjbyAvIFNpbGh1ZXRhCgpgYGB7cn0KZGlzdHMgPSBzY2FsZWRfZGF0YSAlPiUgCiAgZGlzdCgpCgpzY2FsZWRfZGF0YSAlPiUKICAgIGttZWFucygzLCBpdGVyLm1heCA9IDEwMCwgbnN0YXJ0ID0gMjApIC0+IGttCgoKc2lsaG91ZXR0ZShrbSRjbHVzdGVyLCBkaXN0cykgJT4lCiAgIHBsb3QoY29sID0gUkNvbG9yQnJld2VyOjpicmV3ZXIucGFsKDQsICJTZXQyIiksYm9yZGVyPU5BKQpgYGAKCjxicj4KCiogTyB2YWxvciBkZSAwLjYgZGEgc2lsaHVldGEgc2lnbmlmaWNhIHF1ZSBhIG5vc3NhIGNsdXN0ZXJpemHDp8OjbyBmb2kgcmF6b8OhdmVsLiDjg74o4oyQ4pagX+KWoCnjg47imaoKCg==